/*
	Purpose: Appends cleaned survey data, cleaned 1940 Census data, and 
	         cleaned 1936 data. Will be used for TSIV analysis exercises.

	Note: 1940 Census data was downloaded from the directory 
	      /homes/data/cens1940/1940_2-0/100files/ on the 
          NBER server. The sample was restricted to fathers ages 30-50.

          Download all 1940 output files and place them in the Census output folder.  
          All paths in this do-file are correct once the previous instruction has been followed. 
          If you choose to run this file in the server, you'll have to change the paths.

	Creates: TSIV_2Samples_1940Census.dta
*/

clear
set more off

	noisily display "Appending surveys to Census (with 1936)"

* Bring in appended, cleaned surveys
	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
	keep if baseline_sample==1
	gen surveys=1
			
	local vars "log_father_closest* father_HHinc_1936fix log_father_HHinc_1936fix"	
	
* Keep variables that we need
	keep race south_merge sex fatheroccej fam_inc_real log_son_baseline age agesq dob edu wgt_sex_race survey_year decade surveys `vars'
	
* Append Census data
	append using "$CensusData/input/Census1940_fathers_ages30to50.dta" 
	replace census=0 if census==.	
		
* Append 1936 farmer and self-employment data
	preserve 
	use "$Survey1936/output/ConsumptionSurvey_1936_foranalysis.dta", clear
	
	keep occ1950ej tot_fam_inc race south_merge age agesq
	drop if tot_fam_inc==0
	
	rename occ1950ej fatheroccej
	keep if fatheroccej==81 | fatheroccej==21
	
	gen survey1936=1
	
	tempfile tempie
	save `tempie'
	
	restore 
	
	append using `tempie'
	tab census survey1936, m
		
* Additional variable for TSIV
	gen problem_occs = fatheroccej==21 | fatheroccej==81
	tab census problem_occs, m
	
	clonevar hh_income_1936fix = hh_income if census==1 & problem_occs!=1
	replace hh_income_1936fix = tot_fam_inc if survey1936==1 & problem_occs==1
	label var hh_income_1936fix "Household income, Cenus and 1936 fix"
	
	gen log_father_income_1936 = log(hh_income_1936fix)
	label var log_father_income_1936 "Log household income, Census, 1936 fix " 

* Save datasets 
	drop if fatheroccej==99 //exclude non-working fathers

	label var survey1936 "1936 Survey"
	label var surveys "Baseline sample surveys"
	label var problem_occs "Problematic occs. in 1940 Census"

	compress
	save "$Mydirectory1/3_Output/TSIV_2Samples_1940Census.dta", replace 

